{
 "metadata": {
  "name": "",
  "signature": "sha256:81aed14b5c7bdf9461390525cbd56f1b2754be3bd424493d7fcd7b81d0350bdf"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "https://groups.google.com/forum/#!topic/python-etl/XRIJovpb6Qc\n",
      "\n",
      "The conversion I'm trying to make is:\n",
      "- If nb_day is not integer:\n",
      "  - if notes contains 'am':\n",
      "    - split line:\n",
      "      1 with full days, modifying date_end = previous date_end - 1\n",
      "      1 with 0.5 day, modifying date_begin = date_end\n",
      "  - if notes contacts 'pm':\n",
      "    - split line:\n",
      "      1 with 0.5 day, modifying date_end = date_begin\n",
      "      1 with full days, modifying date_begin = previous date_begin + 1"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import datetime\n",
      "import petl.interactive as etl\n",
      "print etl.__version__"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "0.26\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data = \"\"\"user_id,date_begin,date_end,nb_days,notes,projet_id\n",
      "user1,2014-07-31,2014-08-07,5.5,5 days + am,cp\n",
      "user2,2014-07-31,2014-08-07,5.5,5 days + pm,cp\n",
      "user3,2014-07-31,2014-08-06,5,5 days,cp\n",
      "\"\"\""
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "day = datetime.timedelta(days=1)\n",
      "\n",
      "\n",
      "def split_partial_days(row):\n",
      "    if isinstance(row.nb_days, float):\n",
      "        # split out partial days into separate row\n",
      "        if 'am' in row.notes:\n",
      "            # full days\n",
      "            yield (row.user_id, \n",
      "                   row.date_begin, \n",
      "                   row.date_end - day,\n",
      "                   int(row.nb_days),\n",
      "                   row.notes.split('+')[0].strip(), \n",
      "                   row.projet_id)\n",
      "            # partial days\n",
      "            yield (row.user_id, \n",
      "                   row.date_end, \n",
      "                   row.date_end, \n",
      "                   row.nb_days - int(row.nb_days),\n",
      "                   row.notes.split('+')[1].strip(), \n",
      "                   row.projet_id)\n",
      "        if 'pm' in row.notes:\n",
      "            # partial days\n",
      "            yield (row.user_id, \n",
      "                   row.date_begin, \n",
      "                   row.date_begin, \n",
      "                   row.nb_days - int(row.nb_days),\n",
      "                   row.notes.split('+')[1].strip(), \n",
      "                   row.projet_id)\n",
      "            # full days\n",
      "            yield (row.user_id, \n",
      "                   row.date_begin + day, \n",
      "                   row.date_end,\n",
      "                   int(row.nb_days),\n",
      "                   row.notes.split('+')[0].strip(), \n",
      "                   row.projet_id)\n",
      "    else:\n",
      "        # do nothing\n",
      "        yield row\n",
      "\n",
      "        \n",
      "tbl = (etl\n",
      "    .fromcsv(etl.StringSource(data))\n",
      "    .convert(('date_begin', 'date_end'), etl.dateparser('%Y-%m-%d'))\n",
      "    .convert('nb_days', etl.parsenumber)\n",
      "    .rowmapmany(split_partial_days, fields=['user_id', 'date_begin', 'date_end', 'nb_days', 'notes', 'projet_id'])\n",
      ")\n",
      "tbl"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table class='petl'>\r\n",
        "<thead>\r\n",
        "<tr>\r\n",
        "<th>user_id</th>\r\n",
        "<th>date_begin</th>\r\n",
        "<th>date_end</th>\r\n",
        "<th>nb_days</th>\r\n",
        "<th>notes</th>\r\n",
        "<th>projet_id</th>\r\n",
        "</tr>\r\n",
        "</thead>\r\n",
        "<tbody>\r\n",
        "<tr>\r\n",
        "<td>user1</td>\r\n",
        "<td>2014-07-31</td>\r\n",
        "<td>2014-08-06</td>\r\n",
        "<td style='text-align: right'>5</td>\r\n",
        "<td>5 days</td>\r\n",
        "<td>cp</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>user1</td>\r\n",
        "<td>2014-08-07</td>\r\n",
        "<td>2014-08-07</td>\r\n",
        "<td style='text-align: right'>0.5</td>\r\n",
        "<td>am</td>\r\n",
        "<td>cp</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>user2</td>\r\n",
        "<td>2014-07-31</td>\r\n",
        "<td>2014-07-31</td>\r\n",
        "<td style='text-align: right'>0.5</td>\r\n",
        "<td>pm</td>\r\n",
        "<td>cp</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>user2</td>\r\n",
        "<td>2014-08-01</td>\r\n",
        "<td>2014-08-07</td>\r\n",
        "<td style='text-align: right'>5</td>\r\n",
        "<td>5 days</td>\r\n",
        "<td>cp</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>user3</td>\r\n",
        "<td>2014-07-31</td>\r\n",
        "<td>2014-08-06</td>\r\n",
        "<td style='text-align: right'>5</td>\r\n",
        "<td>5 days</td>\r\n",
        "<td>cp</td>\r\n",
        "</tr>\r\n",
        "</tbody>\r\n",
        "</table>\r\n"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n",
        "| 'user_id' | 'date_begin'               | 'date_end'                 | 'nb_days' | 'notes'  | 'projet_id' |\n",
        "+===========+============================+============================+===========+==========+=============+\n",
        "| 'user1'   | datetime.date(2014, 7, 31) | datetime.date(2014, 8, 6)  |         5 | '5 days' | 'cp'        |\n",
        "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n",
        "| 'user1'   | datetime.date(2014, 8, 7)  | datetime.date(2014, 8, 7)  |       0.5 | 'am'     | 'cp'        |\n",
        "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n",
        "| 'user2'   | datetime.date(2014, 7, 31) | datetime.date(2014, 7, 31) |       0.5 | 'pm'     | 'cp'        |\n",
        "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n",
        "| 'user2'   | datetime.date(2014, 8, 1)  | datetime.date(2014, 8, 7)  |         5 | '5 days' | 'cp'        |\n",
        "+-----------+----------------------------+----------------------------+-----------+----------+-------------+\n",
        "| 'user3'   | datetime.date(2014, 7, 31) | datetime.date(2014, 8, 6)  |         5 | '5 days' | 'cp'        |\n",
        "+-----------+----------------------------+----------------------------+-----------+----------+-------------+"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    }
   ],
   "metadata": {}
  }
 ]
}